Learning Vision Systems on Graphics Cards (MA-INF 4308)
Salih MARANGOZ (s6samara)
Elif Cansu YILDIZ (s6efyild)
# For non-deterministic inference
%env CUBLAS_WORKSPACE_CONFIG=4096:8
%load_ext autoreload
%autoreload 2
# Download IO Routines
#!wget "https://lmb.informatik.uni-freiburg.de/resources/datasets/IO.py" -nv -O io_routines.py
import os
import sys
from PIL import Image
import numpy as np
from torchvision import transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt
import numpy as np
from models.psmnet import PSMNet, HarderDisparityRegression
from models.gcnet import GCnet
from models.psmnet_basic import PSMNet as PSMNet_basic
import dataset_utilities as d_utils
import training_utilities as t_utils
print("Module versions:")
print('\n'.join(f'{"> " + m.__name__}: {m.__version__}' for m in globals().values() if getattr(m, '__version__', None)))
plt.style.use('seaborn')
Extracting all dataset may use more than 100GB of space. This graph may help you to only extract which are needed. For example, we didn't use 35mm_focallength data for the driving dataset. Modify DATASET_ROOT if your dataset folder is on another location.
├── labvision_project_folder
│ └── ...
├── dataset
├── driving
│ ├── disparity
│ │ └── 15mm_focallength
│ └── frames_cleanpass_webp
│ │ └── 15mm_focallength
├── flyingthings3d
│ ├── disparity
│ │ ├── TEST
│ │ └── TRAIN
│ └── frames_cleanpass_webp
│ │ ├── TEST
│ │ └── TRAIN
├── kitti
│ └── training
│ ├── image_2
│ ├── image_3
│ └── disp_occ_0
└── monkaa
├── disparity
│ ├── a_rain_of_stones_x2
│ └── ....
└── frames_cleanpass_webp
├── a_rain_of_stones_x2
└── ....
DATASET_ROOT = "../../dataset"
# DATASET_ROOT = "../dataset"
driving_path = os.path.join(DATASET_ROOT, "driving")
monkaa_path = os.path.join(DATASET_ROOT, "monkaa")
flyingthings3d_path = os.path.join(DATASET_ROOT, "flyingthings3d")
kitti_path = os.path.join(DATASET_ROOT, "kitti")
We implemented *-Multi versions of some transforms which can process left image, right image and disparity map inputs. In these classes, while cropping operations done on all images, jittering and normalization only done on RGB images. We also added SanitizeImageSizesMulti for cropping data especially for KITTI dataset. This transform crops image and disparity data from left and right to match with target width, and only from top to match with target height (since KITTI has no disparity values on the top section of images).
pretrain_transforms = transforms.Compose([
d_utils.ToTensorMulti(),
d_utils.RandomCropMulti((256,512)),
d_utils.NormalizeMulti(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
finetune_transforms = transforms.Compose([
d_utils.ToTensorMulti(),
d_utils.RandomCropMulti((256,512)),
d_utils.ColorJitterMulti(brightness=0.3, hue=.05),
d_utils.NormalizeMulti(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
eval_transforms = transforms.Compose([
d_utils.ToTensorMulti(),
d_utils.SanitizeImageSizesMulti(),
d_utils.NormalizeMulti(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
We analyzed histogram of disparity values and coverage for the specific maximum disparity parameter. We found 192 would be a good maximum disparity parameter.
print("For Kitti Dataset:")
_=d_utils.analyze_dataset_disparity_coverage(plt.subplots(1,2, figsize=(15,5)),
compute_zeros_seperately=True,
use_file="dataset_stats/kitti_analyze_precalculated.npy")
print("For Scene Flow Datasets:")
_=d_utils.analyze_dataset_disparity_coverage(plt.subplots(1,2, figsize=(15,5)),
use_file="dataset_stats/sceneflow_analyze_precalculated.npy")
driving_dataset = d_utils.DrivingDataset(driving_path, transforms=pretrain_transforms)
_ = d_utils.imshow_samples_and_print_information(driving_dataset, unnormalize=True)
monkaa_dataset = d_utils.MonkaaDataset(monkaa_path, transforms=pretrain_transforms)
_ = d_utils.imshow_samples_and_print_information(monkaa_dataset, unnormalize=True)
flyingthings3d_dataset = d_utils.Flyingthings3dDataset(flyingthings3d_path, transforms=pretrain_transforms)
_ = d_utils.imshow_samples_and_print_information(flyingthings3d_dataset, unnormalize=True)
kitti_train_val, kitti_eval = d_utils.KittiDataset(kitti_path, train_transforms=finetune_transforms, eval_transforms=eval_transforms).split_dataset()
_ = d_utils.imshow_samples_and_print_information(kitti_train_val, unnormalize=True, rainbow=True)
_ = d_utils.imshow_samples_and_print_information(kitti_eval, unnormalize=True, rainbow=True)
We splitted Scene Flow 95% for training and 5% for validation. For Kitti we splitted 150 samples for training/validation and 50 samples for testing, then splitted 150 samples as 80% for training and 20% for validation.
train1, val1 = d_utils.split_dataset(driving_dataset, first_part=0.95, second_part=0.05)
train2, val2 = d_utils.split_dataset(monkaa_dataset, first_part=0.95, second_part=0.05)
train3, val3 = d_utils.split_dataset(flyingthings3d_dataset, first_part=0.95, second_part=0.05)
finetuning_train_dataset, finetuning_val_dataset = d_utils.split_dataset(kitti_train_val,
first_part=0.8,
second_part=0.2)
pretraining_train_dataset = torch.utils.data.ConcatDataset((train1, train2, train3))
pretraining_val_dataset = torch.utils.data.ConcatDataset((val1, val2, val3))
print("Size of the finetuning_train_dataset:", len(finetuning_train_dataset))
print("Size of the finetuning_val_dataset:", len(finetuning_val_dataset))
print("Size of the pretraining_train_dataset:", len(pretraining_train_dataset))
print("Size of the pretraining_val_dataset:", len(pretraining_val_dataset))
For training higher batch sizes can be set. Since test images are larger than training images we recommend using small batch sizes.
train_batch_size = 1
eval_batch_size = 1
finetuning_train_dataloader = d_utils.make_dataloader(finetuning_train_dataset, batch_size=train_batch_size)
finetuning_val_dataloader = d_utils.make_dataloader(finetuning_val_dataset, batch_size=eval_batch_size)
pretraining_train_dataloader = d_utils.make_dataloader(pretraining_train_dataset, batch_size=train_batch_size)
pretraining_val_dataloader = d_utils.make_dataloader(pretraining_val_dataset, batch_size=eval_batch_size)
test_dataloader = d_utils.make_dataloader(kitti_eval, batch_size=1, shuffle=False)
device = t_utils.get_device()
print(f"Device is : {device}")
print(torch.cuda.get_device_name(torch.cuda.current_device()))
all_experiments.ipynb (and all_experiments.html) to keep this report clean. We show some code examples here to give insight about how we done our experiments systematically below. Also all experiments includes CELL.txt showing how the experiment was done.
For Pretraining:
Trains GCnet with 192 max disparity for 10 epoch on pretraining dataset.
model = GCnet(192).to(device)
e1 = t_utils.Experiment(name = "GCnet-pretraining",
description = "Pretraining GCnet with maxdisp=192 for 10 epochs",
model = model,
criterion = nn.SmoothL1Loss(),
scheduler = None,
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4),
train_loader = pretraining_train_dataloader,
val_loader = pretraining_val_dataloader,
max_iter = len(pretraining_train_dataset)*10,
val_interval = 2500,
vis_interval = 500,
save_interval= 5000,
device = device)
e1.train_model()
e1.save()
For Finetuning:
Loads pretrained PSMNet model and finetunes for 20000 iterations.
checkpoint = torch.load("runs/PSM-pretraining-2021_09_22-08_31_27_192disp_10epoch_default/model_manual_save.pt")
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
e1 = t_utils.Experiment(name = "PSM-finetuning",
description = "Finetuning PSMNet with maxdisp=192 with lr=1e-5 on Pretrained model with 10 epochs.",
model = model,
criterion = nn.SmoothL1Loss(),
scheduler = None,
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5),
train_loader = finetuning_train_dataloader,
val_loader = finetuning_val_dataloader,
max_iter = 20000,
val_interval = 200,
vis_interval = 100,
save_interval= 500,
device = device)
e1.train_model()
e1.save()
To Continue an Experiment:
We made it easy to interrupt and continue the training later. name, description, etc. parameters are loaded automatically.
model = PSMNet(192).to(device)
e1 = t_utils.Experiment(model = model,
criterion = nn.SmoothL1Loss(),
scheduler = None,
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4),
train_loader = finetuning_train_dataloader,
val_loader = finetuning_val_dataloader,
device = device)
e1.load("runs/PSM-finetuning-2021_09_25-01_29_49_20000iter_lr1e-4/model_manual_save.pt")
e1.train_model()
e1.save()
To Continue an Experiment More:
Experiment parameters can be modified after loading.
e1.max_iter = 30000
e1.train_model()
e1.save()
best_psmnet_sched_path = "runs/PSM-finetuning-2021_09_26-18_02_34_scheduler_lr1e-4/model_manual_save.pt"
best_psmnet_path = "runs/PSM-finetuning-2021_09_25-01_29_49_20000iter_lr1e-4/model_manual_save.pt"
best_gcnet_path = "runs/GCnet-finetuning-2021_09_27-19_30_42_lr1e-4/model_manual_save.pt"
checkpoint = torch.load(best_psmnet_sched_path)
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
t_utils.show_model_outputs(model, test_dataloader, device, count=5, save_path="plots/fullpsmnet-outputs.svg")
checkpoint = torch.load(best_gcnet_path)
model = GCnet(192).to(device)
model.load_state_dict(checkpoint['model'])
t_utils.show_model_outputs(model, test_dataloader, device, count=5, save_path="plots/gcnet-outputs.svg")
checkpoint = torch.load(best_psmnet_sched_path)
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
checkpoint = torch.load(best_gcnet_path)
model2 = GCnet(192).to(device)
model2.load_state_dict(checkpoint['model'])
t_utils.show_compare_model_error(model, model2, "PSMNet", "GCNet", test_dataloader, device, idx=15, save_path="plots/compare/psmnet_vs_gcnet.svg")
checkpoint = torch.load(best_psmnet_path)
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
checkpoint = torch.load(best_psmnet_sched_path)
model2 = PSMNet(192).to(device)
model2.load_state_dict(checkpoint['model'])
t_utils.show_compare_model_error(model, model2, "PSMNet without Scheduler", "PSMNet finetuned with Scheduler", test_dataloader, device, idx=30, save_path="plots/compare/scheduler_effect.svg")
all_experiments.ipynb (and all_experiments.html). We seperated other experiments to keep this report clean.
tensorboard --samples_per_plugin="scalar=10000,images=200" --logdir runs/


checkpoint = torch.load(best_psmnet_sched_path)
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
t_utils.evaluate_model(model, test_dataloader, device, nn.SmoothL1Loss())
#t_utils.show_model_error(model, test_dataloader, device, count=10, save_path="plots/full-psmnet")
checkpoint = torch.load(best_psmnet_path)
model = PSMNet(192).to(device)
model.load_state_dict(checkpoint['model'])
t_utils.evaluate_model(model, test_dataloader, device, nn.SmoothL1Loss())
checkpoint = torch.load(best_gcnet_path)
model = GCnet(192).to(device)
model.load_state_dict(checkpoint['model'])
t_utils.evaluate_model(model, test_dataloader, device, nn.SmoothL1Loss())
#t_utils.show_model_error(model, test_dataloader, device, count=10, save_path="plots/gcnet")